**************************************************************
* Produce Summary Statistics
* 1. Individual level
* 2. Daily level
* 3. Quotes and orders
* 4. Claims
**************************************************************



clear 
clear matrix
set memory 1000m
set more off
cap log close

cd "/Users/..."

global do_file="‎⁨/Users/.../do_file"
global log_file="/Users/.../log_file"
global raw_data="/Users/.../raw_data⁩⁩"
global working_data="/Users/.../working_data"
global results="/Users/.../results"


log using log_file/sum_day_072020.log, replace


**************************************
* Summary Statistics: individual level 
**************************************

* get user_id who has daily data
use working_data/clean_day_level_geo_weather_081519.dta, clear
keep user_id
bysort user_id: keep if _n==1
save working_data/user_id_day_level_081619.dta, replace


** load trip level data, summary
use working_data/clean_trip_level_geo_weather_081519.dta, clear
merge m:1 user_id using working_data/user_id_day_level_081619.dta
drop if _merge~=3
drop _merge

sort user_id start_time_new
bysort user_id: gen trip_index=_n
bysort user_id: gen total_trips=_N

sum total_trips if trip_index==1

count 
count if trip_index==1


** load day-level data, summary
use working_data/clean_day_level_geo_weather_081519.dta, clear

sort user_id trip_start_date
bysort user_id: gen day_index=_n
bysort user_id: gen total_days=_N

tsset user_id day_index

gen trip_start_date_new=date(trip_start_date,"YMD")
format trip_start_date_new %td

gen temp=trip_start_date_new if day_index==1
bysort user_id: egen first_drive_date=total(temp)
drop temp

gen temp=trip_start_date_new if day_index==total_days
bysort user_id: egen last_drive_date=total(temp)
drop temp

gen active_days=last_drive_date-first_drive_date+1
gen active_ratio=total_days/active_days

merge m:1 user_id using working_data/quote_user_id.dta
drop if _merge==2

gen quote_ever=1 if _merge==3
replace quote_ever=0 if _merge==1
drop _merge

sum quote_ever total_days active_days active_ratio if day_index==1 

tab prov if day_index==1 

** produce graphs

//hist active_ratio if day_index==1
//graph export $results/figures/active_ratio.png, replace



**************************************
* Summary Statistics: Day level 
**************************************

** generate day level dummies

gen total_prev_hard_brake_dummy=0 
replace total_prev_hard_brake_dummy=1 if total_prev_hard_brake>0

gen total_real_hard_brake_1_dummy=0
replace total_real_hard_brake_1_dummy=1 if total_real_hard_brake_1>0

gen total_hard_brake_dummy=0
replace total_hard_brake_dummy=1 if total_hard_brake>0



gen total_hard_left_dummy=0
replace total_hard_left_dummy=1 if total_hard_left>0

gen total_hard_right_dummy=0
replace total_hard_right_dummy=1 if total_hard_right>0

gen total_hard_u_dummy=0
replace total_hard_u_dummy=1 if total_hard_u>0

gen total_hard_turn_dummy=0
replace total_hard_turn_dummy=1 if total_hard_turn>0


gen both_1=1 if total_prev_hard_brake>0 & total_hard_turn>0
replace both_1=0 if both_1==.

gen both_2=1 if total_real_hard_brake_1>0 & total_hard_turn>0
replace both_2=0 if both_2==.

gen both_3=1 if total_hard_brake>0 & total_hard_turn>0
replace both_3=0 if both_3==.


** summarize near-miss related variables: dummies

sum total_prev_hard_brake_dummy total_real_hard_brake_1_dummy total_hard_brake_dummy ///
	total_hard_left_dummy total_hard_right_dummy total_hard_u_dummy total_hard_turn_dummy ///
	both_1 both_2 both_3

sum total_prev_hard_brake total_real_hard_brake_2 total_hard_brake ///
	total_hard_left total_hard_right total_hard_u total_hard_turn ///
	 
	
	
** summarize other variables
 
gen rain_snow=1 if rain>0 | rain_storm>0 | snow>0
replace rain_snow=0 if rain_snow==.

gen other=1 if cloudy>0 | windy>0 | foggy>0
replace other=0 if other==.

sum	control_score cautious_score focused_score drive_score ///
	weekend rush_hour total_traffic_jam ///
	high_temper low_temper sunny rain_snow other 
	

	
** summarize risky behavior	

sum total_phone_use distance duration speed drive_at_night highway 	
	

** produce graphs
sort user_id day_index

twoway (histogram gap_time if gap_time<30, width(1)) ///
       (histogram gap_time if gap_time<30  & l.total_prev_hard_brake>0, fcolor(none) lcolor(black) width(1)) ///
           ,ylabel(,angle(horizontal)) legend(order(1 "Full Sample" 2 "After A Near-Miss" )) xtitle("The # of days between two driving days") ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) 		   
graph export $results/figures/gap_day_prev.png, replace 	



***************************************
* Summary Statistics: Quotes and Orders
***************************************


use working_data/merge_quote_drive_081619.dta, clear


** first: find the number of people who quoted and purchased

tab quote_ind, m
bysort user_id: gen temp=_n
count if temp==1
bysort user_id: egen total_quote=total(quote_ind)


tab order_ins, m
bysort user_id: egen total_order=total(order_ins)
tab total_order if temp==1, m
count if temp==1 & total_order>0


hist total_quote if temp==1
hist total_order if temp==1

sum total_quote if temp==1, d
sum total_order if temp==1 & total_order>0, d


** summarize number of quotes and amount quoted if amount>0

sum avg_total_fee
sum avg_jiaoq if avg_jiaoq>0 & avg_jiaoq~=.
sum avg_premium if avg_premium>0 & avg_premium~=.
sum avg_chesun if avg_chesun>0 & avg_chesun~=.
sum avg_chesun_bj if avg_chesun_bj>0 & avg_chesun_bj~=.
sum avg_sanzhe if avg_sanzhe>0 & avg_sanzhe~=.
sum avg_sanzhe_bj if avg_sanzhe_bj>0 & avg_sanzhe_bj~=.
sum avg_daoq if avg_daoq>0 & avg_daoq~=.
sum avg_daoq_bj if avg_daoq_bj>0 & avg_daoq_bj~=.
sum avg_other_fee if avg_other_fee>0 & avg_other_fee~=.


* summarize number of orders and amount ordered if amount>0

sum order_total_fee
sum order_jiaoq_fee if order_jiaoq_fee>0 & order_jiaoq_fee~=.
sum order_premium_fee if order_premium_fee>0 & order_premium_fee~=.
sum order_chesun if order_chesun>0 & order_chesun~=.
sum order_chesun_bj if order_chesun_bj>0 & order_chesun_bj~=.
sum order_sanzhe if order_sanzhe>0 & order_sanzhe~=.
sum order_sanzhe_bj if order_sanzhe_bj>0 & order_sanzhe_bj~=.
sum order_daoq if order_daoq>0 & order_daoq~=.
sum order_daoq_bj if order_daoq_bj>0 & order_daoq_bj~=.
sum order_other_fee if order_other_fee>0 & order_other_fee~=.



****************************
* Summary Statistics: Claims
****************************

use working_data/claim_clean_further_07032019.dta, clear

bysort vin: gen temp=_n
count if temp==1

gen year_month=substr(claim_date, 1, 7)
tab year_month

count

bysort vin: gen total_claims=_N
sum total_claims if temp==1

gen amt_reimburse=total_fee_jiaoq+total_fee_sy
gen amt_est=labor_fee_jiaoq+labor_fee_sy+material_fee_jiaoq+material_fee_sy

egen amt_max=rowmax(amt_reimburse amt_est) if total_claims>0


sum amt_reimburse if amt_reimburse>0
sum amt_est if amt_est>0
sum amt_max if amt_max>0
